# # -*- coding: utf-8 -*-
# import random
#
# import math
# from scrapy import Request
# from scrapy.exceptions import IgnoreRequest
# from scrapy.utils.project import get_project_settings
# from datetime import datetime
#
# from zc_core.util import file_reader
# from zc_core.util.batch_gen import time_to_batch_no
# from zc_core.util.http_util import retry_request
# from epec.rules import *
#
#
# class SearchSpider(BaseSpider):
#     name = "search"
#     # 分类链接
#     cata_url = "https://mall.epec.com/ecmall/static/epec/ecmall/html/electronicSupermarket/marketCategory.html"
#     # 列表页
#     sku_list_url = "https://mall.epec.com/ecmall/search/productInfoSearchFilterList.do"
#
#     # 主页
#     index_url = "https://mall.epec.com"
#
#     def __init__(self, batchNo=None, *args, **kwargs):
#         super(SearchSpider, self).__init__(*args, **kwargs)
#         if not batchNo:
#             self.batch_no = time_to_batch_no(datetime.now())
#         else:
#             self.batch_no = int(batchNo)
#         # 接口页数大小固定
#         self.page_size = 6
#         self.max_page_limit = math.ceil(5000 / self.page_size)
#
#     def _build_list_req(self, callback, page, kw):
#         data = {
#             'page': '1',
#             'searchWord': kw,
#             'selectedCatagoryList': '',
#             'catagoryValueList': '',
#             'applyProcurementProduct': '0',
#             'procurementProduct': '1',
#             'sortText': 'zonghe;-;zonghe',
#             'pageLimit': str(self.page_size),
#             'pageStart': str((page - 1) * self.page_size)
#         }
#         return scrapy.FormRequest(
#             url=self.sku_list_url,
#             method="POST",
#             meta={
#                 'reqType': 'sku',
#                 'page': page,
#                 'batchNo': self.batch_no,
#                 'kw': kw,
#             },
#             headers={
#                 'Content-Type': 'application/x-www-form-urlencoded'
#             },
#             formdata=data,
#             callback=callback,
#             errback=self.error_back,
#         )
#
#     def start_requests(self):
#         # 处理订单列表
#         kw_list = file_reader.read_rows('doc/kw.txt')
#         if kw_list:
#             self.logger.info('目标：%s' % (len(kw_list)))
#             random.shuffle(kw_list)
#             for kw in kw_list:
#                 # 查询sku
#                 page = 1
#                 yield self._build_list_req(callback=self.parse_sku_list, page=page, kw=kw)
#
#     def parse_sku_list(self, response):
#         if response and response.text:
#             meta = response.meta
#             page = meta.get("page")
#             kw = meta.get('kw')
#
#             sku_list, item_list = parse_sku(response)
#             if sku_list and len(sku_list) > 0:
#                 self.logger.info('清单: kw=%s, page=%s, count=%s' % (kw, page, len(sku_list)))
#                 yield Box('sku', self.batch_no, sku_list)
#                 if item_list and len(item_list) > 0:
#                     yield Box('item', self.batch_no, item_list)
#
#                 if page == 1:
#                     total_page = parse_total_page(response, self.page_size)
#                     self.logger.info("页数: kw=%s, total=%s" % (kw, total_page))
#                     if total_page < self.max_page_limit:
#                         for page_cur in range(2, total_page + 1):
#                             yield self._build_list_req(callback=self.parse_sku_list, page=page_cur, kw=kw)
#                     else:
#                         self.logger.info("超限1: kw=%s, total=%s" % (kw, total_page))
#
#             else:
#                 self.logger.info('空页: kw=%s, page=%s' % (kw, page))

